Section 2: DESeq2 analysis

DESeq2 analyses steps

This section uses the counts data (for selected datasets) generated in Section 1 to differential expression (DE) analyses using DESeq2. Breifly, the counts data is imported in R, batch corrected using ComBat_seq, DE performed for various contrasts using DESeq2, results visualized as volcano plots, and cell enrichment performed using PlacentaCellEnrich.

Prerequisites

R packages required for this section are loaded

setwd("~/github/amnion.vs.other_RNASeq")
# load the modules
library(sva)
library(tidyverse)
library(DESeq2)
library(vsn)
library(pheatmap)
library(ggrepel)
library(RColorBrewer)
library(reshape2)
require(biomaRt)
library(EnhancedVolcano)
library(TissueEnrich)
library(plotly)
library(DT)
library(cowplot)
library(biomaRt)

Import datasets

The counts data and its associated metadata (coldata) are imported for analyses

counts = '~/github/amnion.vs.other_RNASeq/assets/counts-subset-v4.txt'
groupFile = '~/github/amnion.vs.other_RNASeq/assets/batch-subset-v4.txt'
coldata <-
  read.csv(
    groupFile,
    row.names = 1,
    sep = "\t",
    stringsAsFactors = TRUE
  )
cts <- as.matrix(read.csv(counts, sep = "\t", row.names = "gene.ids"))

inspect the coldata

DT::datatable(coldata)

reorder columns of cts according to coldata rows. Check if it both files matches.

colnames(cts)
#>  [1] "Naive_H9_hESCs_1"                "Naive_H9_hESCs_2"               
#>  [3] "nTE_D1.Naive_H9_hESCs_1"         "nTE_D1.Naive_H9_hESCs_2"        
#>  [5] "nTE_D2.Naive_H9_hESCs_1"         "nTE_D2.Naive_H9_hESCs_2"        
#>  [7] "nTE_D3.Naive_H9_hESCs_1"         "nTE_D3.Naive_H9_hESCs_2"        
#>  [9] "nCT_P3.Naive_H9_hESCs_1"         "nCT_P3.Naive_H9_hESCs_2"        
#> [11] "nCT_P10.Naive_H9_hESCs_1"        "nCT_P10.Naive_H9_hESCs_2"       
#> [13] "nCT_P15.Naive_H9_hESCs_1"        "nCT_P15.Naive_H9_hESCs_2"       
#> [15] "cR_nCT_P15.Naive_H9_hESCs_1"     "cR_nCT_P15.Naive_H9_hESCs_2"    
#> [17] "nCT_P15.409B2_iPSC_hESCs_1"      "nCT_P15.409B2_iPSC_hESCs_2"     
#> [19] "Placenta.derived_tbSCs_CT30_Ex1" "Placenta.derived_tbSCs_CT30_Ex2"
#> [21] "nST.Naive_H9_Ex1"                "nST.Naive_H9_Ex2"               
#> [23] "nEVT.Naive_H9_Ex1"               "nEVT.Naive_H9_Ex2"              
#> [25] "Primed_H9_hESCs_1"               "Primed_H9_hESCs_2"              
#> [27] "pBAP_D1.Primed_H9_hESCs_1"       "pBAP_D1.Primed_H9_hESCs_2"      
#> [29] "pBAP_D2.Primed_H9_hESCs_1"       "pBAP_D2.Primed_H9_hESCs_2"      
#> [31] "pBAP_D3.Primed_H9_hESCs_1"       "pBAP_D3.Primed_H9_hESCs_2"      
#> [33] "CytoTB_7_gestational_wks_1"      "CytoTB_7_gestational_wks_2"     
#> [35] "CytoTB_9_gestational_wks_1"      "CytoTB_11_gestational_wks_1"    
#> [37] "hESC_H1_STB_gt70um_D8_BAP_1"     "hESC_H1_STB_gt70um_D8_BAP_2"    
#> [39] "hESC_H1_STB_gt70um_D8_BAP_3"     "hESC_H1_STB_40.70um_D8_BAP_1"   
#> [41] "hESC_H1_STB_40.70um_D8_BAP_2"    "hESC_H1_STB_40.70um_D8_BAP_3"   
#> [43] "hESC_H1_STB_lt40um_D8_BAP_1"     "hESC_H1_STB_lt40um_D8_BAP_2"    
#> [45] "hESC_H1_STB_lt40um_D8_BAP_3"     "hESC_H1_D8_MEF.CM.and.FGF2_1"   
#> [47] "hESC_H1_D8_MEF.CM.and.FGF2_2"    "hESC_H1_D8_MEF.CM.and.FGF2_3"
all(rownames(coldata) %in% colnames(cts))
#> [1] TRUE
cts <- cts[, rownames(coldata)]

Batch correction

Using combat seq (SVA package) run batch correction - using bioproject ids as variable (dataset origin).

cov1 <- as.factor(coldata$BioProject)
adjusted_counts <- ComBat_seq(cts, batch = cov1, group = NULL)
#> Found 2 batches
#> Using null model in ComBat-seq.
#> Adjusting for 0 covariate(s) or covariate level(s)
#> Estimating dispersions
#> Fitting the GLM model
#> Shrinkage off - using GLM estimates for parameters
#> Adjusting the data
all(rownames(coldata) %in% colnames(cts))
#> [1] TRUE
cts <- cts[, rownames(coldata)]

DESeq2

The batch corrected read counts are then used for running DESeq2 analyses

dds <- DESeqDataSetFromMatrix(countData = adjusted_counts,
                              colData = coldata,
                              design = ~ condition)
vsd <- vst(dds, blind = FALSE)
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep, ]
dds <- DESeq(dds)
dds
#> class: DESeqDataSet 
#> dim: 16496 48 
#> metadata(1): version
#> assays(4): counts mu H cooks
#> rownames(16496): ENSG00000000003.15 ENSG00000000005.6 ...
#>   ENSG00000288695.1 ENSG00000288698.1
#> rowData names(106): baseMean baseVar ... deviance maxCooks
#> colnames(48): cR_nCT_P15.Naive_H9_hESCs_1 cR_nCT_P15.Naive_H9_hESCs_2
#>   ... hESC_H1_STB_lt40um_D8_BAP_2 hESC_H1_STB_lt40um_D8_BAP_3
#> colData names(3): BioProject condition sizeFactor

Various contrasts are setup as follows (a total of 9 combinations)

res.nCTvsBAP <- results(dds,
                        contrast = c(
                          "condition",
                          "nCT_P10.Naive_H9_hESCs",
                          "pBAP_D3.Primed_H9_hESCs"
                        ))
res.nTEvsSTB <- results(dds,
                        contrast = c(
                          "condition",
                          "nTE_D3.Naive_H9_hESCs",
                          "hESC_H1_STB_gt70um_D8_BAP"
                        ))
res.nCTvsSTB <- results(dds,
                        contrast = c(
                          "condition",
                          "nCT_P10.Naive_H9_hESCs",
                          "hESC_H1_STB_gt70um_D8_BAP"))
res.nTEvsBAP <- results(dds,
                        contrast = c(
                          "condition",
                          "nTE_D3.Naive_H9_hESCs",
                          "pBAP_D3.Primed_H9_hESCs"
                        ))
res.nTEvsL40 <- results(dds,
                        contrast = c(
                          "condition",
                          "nTE_D3.Naive_H9_hESCs",
                          "hESC_H1_STB_lt40um_D8_BAP"
                        ))
res.nCTvsL40 <- results(dds,
                        contrast = c(
                          "condition",
                          "nCT_P10.Naive_H9_hESCs",
                          "hESC_H1_STB_lt40um_D8_BAP"
                          ))
res.BAPvsL40 <- results(dds,
                        contrast = c(
                          "condition",
                          "pBAP_D3.Primed_H9_hESCs",
                          "hESC_H1_STB_lt40um_D8_BAP"
                          ))
res.BAPvsSTB <- results(dds, 
                        contrast=c(
                          "condition",
                          "pBAP_D3.Primed_H9_hESCs",
                          "hESC_H1_STB_gt70um_D8_BAP"
                          ))
res.STBvsL40 <- results(dds,
                        contrast=c(
                          "condition",
                          "hESC_H1_STB_gt70um_D8_BAP",
                          "hESC_H1_STB_lt40um_D8_BAP"
                          ))

Using a function we will save results as well as generate variable to hold the gene lists for running PCE later-on.

processDE <- function(restable, fileName) {
  restable <- restable[order(restable$padj),]
  outTable <- merge(as.data.frame(restable),
                    as.data.frame(counts(dds, normalized = TRUE)),
                    by = "row.names",
                    sort = FALSE)
  names(outTable)[1] <- "Gene"
  newName = paste0("assets/DESeq2results-", fileName, "_fc.tsv")
  write_delim(outTable, file = newName, delim = "\t")
  upName <- paste0(fileName, ".up")
  upName <- outTable %>% filter(log2FoldChange >= 1) %>%
    filter(padj <= 0.05) %>%
    arrange(desc(log2FoldChange)) %>%
    dplyr::select(Gene)
  downName <- paste0(fileName, ".donw")
  downName <- outTable %>% filter(log2FoldChange <= 1) %>%
    filter(padj <= 0.05) %>%
    arrange(desc(log2FoldChange)) %>%
    dplyr::select(Gene)
}

The results are saved as tsv files

processDE(res.nCTvsBAP, "nCTvsBAP")
processDE(res.nCTvsSTB, "nCTvsSTB")
processDE(res.nTEvsSTB, "nTEvsSTB")
processDE(res.nTEvsBAP, "nTEvsBAP")
processDE(res.nTEvsL40, "nTEvsL40")
processDE(res.nCTvsL40, "nCTvsL40")
processDE(res.BAPvsL40, "BAPvsL40")
processDE(res.STBvsL40, "STBvsL40")
processDE(res.BAPvsSTB, "BAPvsSTB")

Creating gene-lists

The gene-lists have ensembl gene-id-version. We need them in just gene-id. We also need other metadata later for these lists. From ensembl we will download metadata and attach to these lists.

ensembl = useMart("ENSEMBL_MART_ENSEMBL")
listDatasets(ensembl) %>%
  filter(str_detect(description, "Human"))
#>                 dataset              description    version
#> 1 hsapiens_gene_ensembl Human genes (GRCh38.p13) GRCh38.p13
ensembl = useDataset("hsapiens_gene_ensembl", mart = ensembl)
listFilters(ensembl) %>%
  filter(str_detect(name, "ensembl"))
#>                            name
#> 1               ensembl_gene_id
#> 2       ensembl_gene_id_version
#> 3         ensembl_transcript_id
#> 4 ensembl_transcript_id_version
#> 5            ensembl_peptide_id
#> 6    ensembl_peptide_id_version
#> 7               ensembl_exon_id
#>                                                      description
#> 1                       Gene stable ID(s) [e.g. ENSG00000000003]
#> 2       Gene stable ID(s) with version [e.g. ENSG00000000003.15]
#> 3                 Transcript stable ID(s) [e.g. ENST00000000233]
#> 4 Transcript stable ID(s) with version [e.g. ENST00000000233.10]
#> 5                    Protein stable ID(s) [e.g. ENSP00000000233]
#> 6     Protein stable ID(s) with version [e.g. ENSP00000000233.5]
#> 7                              Exon ID(s) [e.g. ENSE00000000003]
filterType <- "ensembl_gene_id_version"
filterValues <- rownames(cts)
listAttributes(ensembl) %>%
  head(20)
#>                             name                                description
#> 1                ensembl_gene_id                             Gene stable ID
#> 2        ensembl_gene_id_version                     Gene stable ID version
#> 3          ensembl_transcript_id                       Transcript stable ID
#> 4  ensembl_transcript_id_version               Transcript stable ID version
#> 5             ensembl_peptide_id                          Protein stable ID
#> 6     ensembl_peptide_id_version                  Protein stable ID version
#> 7                ensembl_exon_id                             Exon stable ID
#> 8                    description                           Gene description
#> 9                chromosome_name                   Chromosome/scaffold name
#> 10                start_position                            Gene start (bp)
#> 11                  end_position                              Gene end (bp)
#> 12                        strand                                     Strand
#> 13                          band                             Karyotype band
#> 14              transcript_start                      Transcript start (bp)
#> 15                transcript_end                        Transcript end (bp)
#> 16      transcription_start_site             Transcription start site (TSS)
#> 17             transcript_length Transcript length (including UTRs and CDS)
#> 18                transcript_tsl             Transcript support level (TSL)
#> 19      transcript_gencode_basic                   GENCODE basic annotation
#> 20             transcript_appris                          APPRIS annotation
#>            page
#> 1  feature_page
#> 2  feature_page
#> 3  feature_page
#> 4  feature_page
#> 5  feature_page
#> 6  feature_page
#> 7  feature_page
#> 8  feature_page
#> 9  feature_page
#> 10 feature_page
#> 11 feature_page
#> 12 feature_page
#> 13 feature_page
#> 14 feature_page
#> 15 feature_page
#> 16 feature_page
#> 17 feature_page
#> 18 feature_page
#> 19 feature_page
#> 20 feature_page
attributeNames <- c('ensembl_gene_id_version',
                    'ensembl_gene_id',
                    'external_gene_name')
annot <- getBM(
  attributes = attributeNames,
  filters = filterType,
  values = filterValues,
  mart = ensembl
)
isDup <- duplicated(annot$ensembl_gene_id)
dup <- annot$ensembl_gene_id[isDup]
annot <- annot[!annot$ensembl_gene_id %in% dup, ]

We will create separate lists for up-regulated genes and for down-regulated genes. function for up-regulated genes.

pceUP <- function(restable) {
  restable <- restable[order(restable$padj),]
  outTable <- merge(as.data.frame(restable),
                    as.data.frame(counts(dds, normalized = TRUE)),
                    by = "row.names",
                    sort = FALSE)
  names(outTable)[1] <- "Gene"
  upName <- outTable %>% filter(log2FoldChange >= 1) %>%
    filter(padj <= 0.05) %>%
    arrange(desc(log2FoldChange)) %>%
    dplyr::select(Gene)
  upNew <- annot[annot$ensembl_gene_id_version %in% upName$Gene, ]
  upList <- upNew$ensembl_gene_id
  return(upList)
}

function for down-regulated genes.

pceDOWN <- function(restable) {
  restable <- restable[order(restable$padj),]
  outTable <- merge(as.data.frame(restable),
                    as.data.frame(counts(dds, normalized = TRUE)),
                    by = "row.names",
                    sort = FALSE)
  names(outTable)[1] <- "Gene"
  downName <- outTable %>% filter(log2FoldChange <= -1) %>%
    filter(padj <= 0.05) %>%
    arrange(desc(log2FoldChange)) %>%
    dplyr::select(Gene)
  downNew <-
    annot[annot$ensembl_gene_id_version %in% downName$Gene, ]
  downList <- downNew$ensembl_gene_id
  return(downList)
}

Run the functions on the DESeq2 results object

nCTvsBAP.up <- pceUP(res.nCTvsBAP)
nCTvsSTB.up <- pceUP(res.nCTvsSTB)
nTEvsSTB.up <- pceUP(res.nTEvsSTB)
nTEvsBAP.up <- pceUP(res.nTEvsBAP)
nTEvsL40.up <- pceUP(res.nTEvsL40)
nCTvsL40.up <- pceUP(res.nCTvsL40)
BAPvsL40.up <- pceUP(res.BAPvsL40)
STBvsL40.up <- pceUP(res.STBvsL40)
BAPvsSTB.up <- pceUP(res.BAPvsSTB)
nCTvsBAP.down <- pceDOWN(res.nCTvsBAP)
nCTvsSTB.down <- pceDOWN(res.nCTvsSTB)
nTEvsSTB.down <- pceDOWN(res.nTEvsSTB)
nTEvsBAP.down <- pceDOWN(res.nTEvsBAP)
nTEvsL40.down <- pceDOWN(res.nTEvsL40)
nCTvsL40.down <- pceDOWN(res.nCTvsL40)
BAPvsL40.down <- pceDOWN(res.BAPvsL40)
STBvsL40.down <- pceDOWN(res.STBvsL40)
BAPvsSTB.down <- pceDOWN(res.BAPvsSTB)

PlacentaCellEnrich (PCE)

The above gene lists are used for running PCE. The function used for running PCE is below

# load the PCE data
l <-
  load(file = "~/TutejaLab/PlacentaEnrich/combine-test-expression1.Rdata")
humanGeneMapping <- dataset$GRCH38$humanGeneMapping
d <- dataset$PlacentaDeciduaBloodData
data <- d$expressionData
cellDetails <- d$cellDetails
# create a run PCE function
runpce <- function(inputgenelist, title, shade) {
  inputGenes <- toupper(inputgenelist)
  expressionData <-
    data[intersect(row.names(data), humanGeneMapping$Gene), ]
  se <-
    SummarizedExperiment(
      assays = SimpleList(as.matrix(expressionData)),
      rowData = row.names(expressionData),
      colData = colnames(expressionData)
    )
  cellSpecificGenesExp <-
    teGeneRetrieval(se, expressedGeneThreshold = 1)
  print(length(inputGenes))
  gs <- GeneSet(geneIds = toupper(inputGenes))
  output2 <- teEnrichmentCustom(gs, cellSpecificGenesExp)
  enrichmentOutput <- setNames(data.frame(assay(output2[[1]]),
                                          row.names = rowData(output2[[1]])[, 1]),
                               colData(output2[[1]])[, 1])
  row.names(cellDetails) <- cellDetails$RName
  enrichmentOutput$Tissue <-
    cellDetails[row.names(enrichmentOutput), "CellName"]
  ggplot(data = enrichmentOutput,
         mapping = aes(x = reorder (Tissue,-Log10PValue), Log10PValue)) +
    geom_bar(stat = "identity",
             color = shade,
             fill = shade) + theme_classic(base_size = 10) +
    theme(
      axis.text.x = element_text(
        angle = 45,
        vjust = 1,
        hjust = 1,
        size = 10
      ),
      plot.title = element_text(size = 12),
      plot.margin = unit(c(1, 1, 1, 2), "cm")
    ) +
    labs(x = "",
         y = "-log10 p-value (adj.)",
         title = title) +
    scale_y_continuous(expand = expansion(mult = c(0, .1)))
}

The PCE is run on each of the gene list as follows (up and down pair are displayed together)

PCE plots

PCE: H9_nCT_P10_Io vs. H9_pBAP_D3_Io

p <-
  runpce(nCTvsBAP.up , "overexpressed in H9_nCT_P10_Io", "#F16746")
#> [1] 1381
q <-
  runpce(nCTvsBAP.down , "overexpressed in H9_pBAP_D3_Io", "#0773B2")
#> [1] 2821
panel_plot <- plot_grid(p,
                        q,
                        labels = c("A", "B"),
                        ncol = 1,
                        nrow = 2)
panel_plot
Fig 2.1: H9_nCT_P10_Io vs. H9_pBAP_D3_Io

Fig 2.1: H9_nCT_P10_Io vs. H9_pBAP_D3_Io

PCE: H9_nCT_P10_Io vs. H1_BAP_D8_>70_Yabe

p <-
  runpce(nCTvsSTB.up , "overexpressed in H9_nCT_P10_Io)", "#F16746")
#> [1] 563
q <-
  runpce(nCTvsSTB.down ,
         "overexpressed in H1_BAP_D8_>70_Yabe",
         "#5C823A")
#> [1] 1766
panel_plot <- plot_grid(p,
                        q,
                        labels = c("A", "B"),
                        ncol = 1,
                        nrow = 2)
panel_plot
Fig 2.2: H9_nCT_P10_Io vs. H1_BAP_D8_>70_Yabe

Fig 2.2: H9_nCT_P10_Io vs. H1_BAP_D8_>70_Yabe

PCE: H9_nTE_D3_Io vs. H1_BAP_D8_>70_Yabe

p <- runpce(nTEvsSTB.up , "overexpressed in H9_nTE_D3_Io", "#C79D2E")
#> [1] 1069
q <-
  runpce(nTEvsSTB.down ,
         "overexpressed in H1_BAP_D8_>70_Yabe",
         "#5C823A")
#> [1] 2070
panel_plot <- plot_grid(p,
                        q,
                        labels = c("A", "B"),
                        ncol = 1,
                        nrow = 2)
panel_plot
Fig 2.3: H9_nTE_D3_Io vs. H1_BAP_D8_>70_Yabe

Fig 2.3: H9_nTE_D3_Io vs. H1_BAP_D8_>70_Yabe

PCE: H9_nTE_D3_Io vs. pBAP_D3_Io

p <- runpce(nTEvsBAP.up , "overexpressed in H9_nTE_D3_Io", "#C79D2E")
#> [1] 886
q <-
  runpce(nTEvsBAP.down , "overexpressed in H9_pBAP_D3_Io", "#0773B2")
#> [1] 2160
panel_plot <- plot_grid(p,
                        q,
                        labels = c("A", "B"),
                        ncol = 1,
                        nrow = 2)
panel_plot
Fig 2.4: H9_nTE_D3_Io vs. pBAP_D3_Io

Fig 2.4: H9_nTE_D3_Io vs. pBAP_D3_Io

PCE: H9_nTE_D3_Io vs. H1_BAP_D8_<40_Yabe

p <- runpce(nTEvsL40.up , "overexpressed in H9_nTE_D3_Io", "#C79D2E")
#> [1] 1183
q <-
  runpce(nTEvsL40.down ,
         "overexpressed in H1_BAP_D8_<40_Yabe",
         "#AFBF38")
#> [1] 2260
panel_plot <- plot_grid(p,
                        q,
                        labels = c("A", "B"),
                        ncol = 1,
                        nrow = 2)
panel_plot
Fig 2.5: H9_nTE_D3_Io vs. H1_BAP_D8_<40_Yabe

Fig 2.5: H9_nTE_D3_Io vs. H1_BAP_D8_<40_Yabe

PCE: H9_nCT_P10_Io vs. H1_BAP_D8_<40_Yabe

p <-
  runpce(nCTvsL40.up , "overexpressed in H9_nCT_P10_Io", "#F16746")
#> [1] 650
q <-
  runpce(nCTvsL40.down ,
         "overexpressed in H1_BAP_D8_<40_Yabe",
         "#AFBF38")
#> [1] 2095
panel_plot <- plot_grid(p,
                        q,
                        labels = c("A", "B"),
                        ncol = 1,
                        nrow = 2)
panel_plot
Fig 2.6: H9_nCT_P10_Io vs. H1_BAP_D8_<40_Yabe

Fig 2.6: H9_nCT_P10_Io vs. H1_BAP_D8_<40_Yabe

PCE: H9_pBAP_D3_Io vs. H1_BAP_D8_<40_Yabe

p <-
  runpce(BAPvsL40.up , "overexpressed in H9_pBAP_D3_Io", "#0773B2")
#> [1] 1544
q <-
  runpce(BAPvsL40.down ,
         "overexpressed in H1_BAP_D8_<40_Yabe",
         "#AFBF38")
#> [1] 1351
panel_plot <- plot_grid(p,
                        q,
                        labels = c("A", "B"),
                        ncol = 1,
                        nrow = 2)
panel_plot
Fig 2.7: H9_pBAP_D3_Io vs. H1_BAP_D8_<40_Yabe

Fig 2.7: H9_pBAP_D3_Io vs. H1_BAP_D8_<40_Yabe

PCE: H1_BAP_D8_>70_Yabe vs. H1_BAP_D8_<40_Yabe

p <-
  runpce(STBvsL40.up ,
         "overexpressed in H1_BAP_D8_>70_Yabe",
         "#5C823A")
#> [1] 1266
q <-
  runpce(STBvsL40.down ,
         "overexpressed in H1_BAP_D8_<40_Yabe",
         "#AEBD38")
#> [1] 1157
panel_plot <- plot_grid(p,
                        q,
                        labels = c("A", "B"),
                        ncol = 1,
                        nrow = 2)
panel_plot
Fig 2.8: H1_BAP_D8_>70_Yabe vs. H1_BAP_D8_<40_Yabe

Fig 2.8: H1_BAP_D8_>70_Yabe vs. H1_BAP_D8_<40_Yabe

PCE: H9_pBAP_D3_Io vs. H1_BAP_D8_>70_Yabe

p <-
  runpce(BAPvsSTB.up , "overexpressed in H9_pBAP_D3_Io", "#0773B2")
#> [1] 1732
q <-
  runpce(BAPvsSTB.down ,
         "overexpressed in H1_BAP_D8_>70_Yabe",
         "#5C823A")
#> [1] 1424
panel_plot <- plot_grid(p,
                        q,
                        labels = c("A", "B"),
                        ncol = 1,
                        nrow = 2)
panel_plot
Fig 2.9: H9_pBAP_D3_Io vs. H1_BAP_D8_>70_Yabe

Fig 2.9: H9_pBAP_D3_Io vs. H1_BAP_D8_>70_Yabe

Volcano Plot function

We will setup a funciton for drawing volcano plots and then run them on the DESeq2 results. The funciton is as shown below

mart <-
  read.csv(
    "~/TutejaLab/amnion_for_ms_20210715/de-analyses/mart-genes.tsv",
    sep = "\t",
    stringsAsFactors = TRUE,
    header = TRUE
  )
myVolPlot <- function(restable, first, second, shade1, shade2) {
  restable <- restable[order(restable$padj),]
  outTable <-
    tibble::rownames_to_column(as.data.frame(restable), "Gene")
  outTable <- merge(
    outTable,
    mart,
    by.x = c("Gene"),
    by.y = c("ensembl_gene_id_version"),
    sort = FALSE
  )
  outTable <- outTable %>%
    mutate_all(na_if, "") %>%
    mutate_all(na_if, " ") %>%
    mutate(gene_symbol = coalesce(gene_symbol, Gene))
  outTable$diffexpressed <- "other.genes"
  outTable$diffexpressed[outTable$log2FoldChange >= 1 &
                           outTable$padj <= 0.05] <-
    paste0("Higher expression in ", first)
  outTable$diffexpressed[outTable$log2FoldChange <= -1 &
                           outTable$padj <= 0.05] <-
    paste0("Higher expression in ", second)
  outTable$delabel <- ""
  outTable$delabel[outTable$log2FoldChange >= 1
                   & outTable$padj <= 0.05
                   &
                     !is.na(outTable$padj)] <-
    outTable$gene_symbol[outTable$log2FoldChange >= 1
                         &
                           outTable$padj <= 0.05
                         &
                           !is.na(outTable$padj)]
  outTable$delabel[outTable$log2FoldChange <= -1
                   & outTable$padj <= 0.05
                   &
                     !is.na(outTable$padj)] <-
    outTable$gene_symbol[outTable$log2FoldChange <= -1
                         &
                           outTable$padj <= 0.05
                         &
                           !is.na(outTable$padj)]
  ggplot(outTable,
         aes(
           x = log2FoldChange,
           y = -log10(padj),
           col = diffexpressed,
           label = delabel
         )) +
    geom_point(alpha = 0.5) +
    theme_classic() +
    scale_color_manual(name = "Expression",
                       values = c(shade1, shade2, "#4d4d4d")) +
    xlab("log2 fold change") +
    ylab("-log10 pvalue (adjusted)") +
    theme(
      legend.text.align = 0,
      legend.position = c(.95, .95),
      legend.justification = c("right", "top"),
      legend.box.just = "right",
      legend.margin = margin(5, 5, 5, 5)
    )
}

Volcano Plots

Running Volcano plots for each comparison is shown below

Volcano plot: H9_nCT_P10_Io vs. H9_pBAP_D3_Io

ggplotly(myVolPlot(
  res.nCTvsBAP,
  "H9_nCT_P10_Io",
  "H9_pBAP_D3_Io",
  "#F16746",
  "#0571b0"
))

Fig 2.10: H9_nCT_P10_Io vs. H9_pBAP_D3_Io

Volcano plot: H9_nCT_P10_Io vs. H1_BAP_D8_>70_Yabe

ggplotly(
  myVolPlot(
    res.nCTvsSTB,
    "H9_nCT_P10_Io",
    "H1_BAP_D8_>70_Yabe",
    "#5C823A",
    "#F16746"
  )
)

Fig 2.11: H9_nCT_P10_Io vs. H1_BAP_D8_>70_Yabe

Volcano plot: H9_nTE_D3_Io vs. H1_BAP_D8_>70_Yabe

ggplotly(
  myVolPlot(
    res.nTEvsSTB,
    "H9_nTE_D3_Io",
    "H1_BAP_D8_>70_Yabe",
    "#5C823A",
    "#C79D2E"
  )
)

Fig 2.12: H9_nTE_D3_Io vs. H1_BAP_D8_>70_Yabe

Volcano plot: H9_nTE_D3_Io vs. H9_pBAP_D3_Io

ggplotly(myVolPlot(
  res.nTEvsBAP,
  "H9_nTE_D3_Io",
  "H9_pBAP_D3_Io",
  "#C79D2E",
  "#0571b0"
))

Fig 2.13: H9_nTE_D3_Io vs. H9_pBAP_D3_Io

Volcano plot: H9_nTE_D3_Io vs. H1_BAP_D8_<40_Yabe

ggplotly(
  myVolPlot(
    res.nTEvsL40,
    "H9_nTE_D3_Io",
    "H1_BAP_D8_<40_Yabe",
    "#AEBD38",
    "#C79D2E"
  )
)

Fig 2.14: H9_nTE_D3_Io vs. H1_BAP_D8_<40_Yabe

Volcano plot: H9_nCT_P10_Io vs. H1_BAP_D8_<40_Yabe

ggplotly(
  myVolPlot(
    res.nCTvsL40,
    "H9_nCT_P10_Io",
    "H1_BAP_D8_<40_Yabe",
    "#AEBD38",
    "#F16746"
  )
)

Fig 2.15: H9_nCT_P10_Io vs. H1_BAP_D8_<40_Yabe

Volcano plot: H9_pBAP_D3_Io vs. H1_BAP_D8_<40_Yabe

ggplotly(
  myVolPlot(
    res.BAPvsL40,
    "H9_pBAP_D3_Io",
    "H1_BAP_D8_<40_Yabe",
    "#AEBD38",
    "#0571b0"
  )
)

Fig 2.16: H9_pBAP_D3_Io vs. H1_BAP_D8_<40_Yabe

Volcano plot: H1_BAP_D8_>70_Yabe vs. H1_BAP_D8_<40_Yabe

ggplotly(
  myVolPlot(
    res.STBvsL40,
    "H1_BAP_D8_>70_Yabe",
    "H1_BAP_D8_<40_Yabe",
    "#AEBD38",
    "#5C823A"
  )
)

Fig 2.17: H1_BAP_D8_>70_Yabe vs. H1_BAP_D8_<40_Yabe

Volcano plot: H9_pBAP_D3_Io vs. H1_BAP_D8_>70_Yabe

ggplotly(
  myVolPlot(
    res.BAPvsSTB,
    "H9_pBAP_D3_Io",
    "H1_BAP_D8_>70_Yabe",
    "#5C823A",
    "#0571b0"
  )
)

Fig 2.18: H9_pBAP_D3_Io vs. H1_BAP_D8_>70_Yabe

Session Information

sessionInfo()
#> R version 4.0.5 (2021-03-31)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 10 x64 (build 19042)
#> 
#> Matrix products: default
#> 
#> locale:
#> [1] LC_COLLATE=English_United States.1252 
#> [2] LC_CTYPE=English_United States.1252   
#> [3] LC_MONETARY=English_United States.1252
#> [4] LC_NUMERIC=C                          
#> [5] LC_TIME=English_United States.1252    
#> 
#> attached base packages:
#> [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
#> [8] methods   base     
#> 
#> other attached packages:
#>  [1] cowplot_1.1.1               DT_0.18                    
#>  [3] plotly_4.9.3                TissueEnrich_1.10.1        
#>  [5] GSEABase_1.52.1             graph_1.68.0               
#>  [7] annotate_1.68.0             XML_3.99-0.6               
#>  [9] AnnotationDbi_1.52.0        ensurer_1.1                
#> [11] EnhancedVolcano_1.8.0       biomaRt_2.46.3             
#> [13] reshape2_1.4.4              RColorBrewer_1.1-2         
#> [15] ggrepel_0.9.1               pheatmap_1.0.12            
#> [17] vsn_3.58.0                  DESeq2_1.30.1              
#> [19] SummarizedExperiment_1.20.0 Biobase_2.50.0             
#> [21] MatrixGenerics_1.2.1        matrixStats_0.58.0         
#> [23] GenomicRanges_1.42.0        GenomeInfoDb_1.26.7        
#> [25] IRanges_2.24.1              S4Vectors_0.28.1           
#> [27] BiocGenerics_0.36.1         forcats_0.5.1              
#> [29] stringr_1.4.0               dplyr_1.0.7                
#> [31] purrr_0.3.4                 readr_1.4.0                
#> [33] tidyr_1.1.3                 tibble_3.1.1               
#> [35] ggplot2_3.3.5               tidyverse_1.3.1            
#> [37] sva_3.38.0                  BiocParallel_1.24.1        
#> [39] genefilter_1.72.1           mgcv_1.8-35                
#> [41] nlme_3.1-152               
#> 
#> loaded via a namespace (and not attached):
#>  [1] readxl_1.3.1          backports_1.2.1       BiocFileCache_1.14.0 
#>  [4] plyr_1.8.6            lazyeval_0.2.2        splines_4.0.5        
#>  [7] crosstalk_1.1.1       digest_0.6.27         htmltools_0.5.1.1    
#> [10] fansi_0.4.2           magrittr_2.0.1        memoise_2.0.0        
#> [13] limma_3.46.0          modelr_0.1.8          extrafont_0.17       
#> [16] extrafontdb_1.0       askpass_1.1           rmdformats_1.0.3     
#> [19] prettyunits_1.1.1     colorspace_2.0-1      blob_1.2.1           
#> [22] rvest_1.0.0           rappdirs_0.3.3        haven_2.4.1          
#> [25] xfun_0.22             crayon_1.4.1          RCurl_1.98-1.3       
#> [28] jsonlite_1.7.2        survival_3.2-11       glue_1.4.2           
#> [31] gtable_0.3.0          zlibbioc_1.36.0       XVector_0.30.0       
#> [34] DelayedArray_0.16.3   proj4_1.0-10.1        Rttf2pt1_1.3.8       
#> [37] maps_3.3.0            scales_1.1.1          DBI_1.1.1            
#> [40] edgeR_3.32.1          Rcpp_1.0.6            viridisLite_0.4.0    
#> [43] xtable_1.8-4          progress_1.2.2        bit_4.0.4            
#> [46] preprocessCore_1.52.1 htmlwidgets_1.5.3     httr_1.4.2           
#> [49] ellipsis_0.3.2        farver_2.1.0          pkgconfig_2.0.3      
#> [52] sass_0.4.0            dbplyr_2.1.1          locfit_1.5-9.4       
#> [55] utf8_1.2.1            labeling_0.4.2        tidyselect_1.1.1     
#> [58] rlang_0.4.11          munsell_0.5.0         cellranger_1.1.0     
#> [61] tools_4.0.5           cachem_1.0.5          cli_2.5.0            
#> [64] generics_0.1.0        RSQLite_2.2.7         broom_0.7.6          
#> [67] evaluate_0.14         fastmap_1.1.0         yaml_2.2.1           
#> [70] knitr_1.33            bit64_4.0.5           fs_1.5.0             
#> [73] ash_1.0-15            ggrastr_0.2.3         xml2_1.3.2           
#>  [ reached getOption("max.print") -- omitted 35 entries ]